Practical Lab 3 - Vanilla CNN and Fine-Tune VGG16 - for Dogs and Cats Classification
CSCN8010,Sec2 –Foundations of Machine Learning Frameworks
Name : Pradeepti Kasam
Student ID : 8965985
1. Obtain the Data: Get the Dogs vs Cats dataset (see CSCN8010 class notebook)
- The Dataset (with an Open Data license)
#importing necessary libraries
import os, shutil, pathlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from random import choice
from PIL import Image
import cv2
import hashlib
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, classification_report, average_precision_score, precision_recall_curve
#Dataset contains 25000 images. Extracting only 5000 images which are split into 1000 training images, 500 validation images and 1000 test images per class.
original_dir = pathlib.Path("train")
new_base_dir = pathlib.Path("kaggle_dogs_vs_cats_small")
def make_subset(subset_name, start_index, end_index):
for category in ("cat", "dog"):
dir = new_base_dir / subset_name / category
os.makedirs(dir)
fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
for fname in fnames:
shutil.copyfile(src=original_dir / fname,
dst=dir / fname)
make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)
2. EDA: Explore the data with relevant graphs, statistics and insights (5 points)
a. Image Inspection
#To Display random samples of dogs and cats.
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
data_dir_train = pathlib.Path("kaggle_dogs_vs_cats_small/train")
categories = ["cat", "dog"]
for ax, category in zip(axes, categories):
img_path = os.path.join(data_dir_train, category, choice(os.listdir(os.path.join(data_dir_train, category))))
img = Image.open(img_path)
ax.imshow(img)
ax.set_title(category.capitalize())
ax.axis('off')
plt.tight_layout()
plt.show()
b. Image Dimension Analysis
#To Analyze the image resolution and aspect ratios to decide preprocessing strategies.
dimensions = []
for category in categories:
for file in os.listdir(os.path.join(data_dir_train, category)):
img = Image.open(os.path.join(data_dir_train, category, file))
dimensions.append(img.size)
df_dimensions = pd.DataFrame(dimensions, columns=['Width', 'Height'])
print(df_dimensions.describe())
# Plot width vs. height
plt.scatter(df_dimensions['Width'], df_dimensions['Height'], alpha=0.5)
plt.title("Width vs Height of Images")
plt.xlabel("Width")
plt.ylabel("Height")
plt.show()
Width Height count 2000.000000 2000.000000 mean 403.972500 358.917500 std 109.258846 97.419413 min 59.000000 50.000000 25% 320.000000 300.000000 50% 442.500000 374.000000 75% 499.000000 418.000000 max 1023.000000 768.000000
Observations:
An outlier is observed in the image resolution.
The image resolution is not consistent. Hence, resizing the images to a common resolution.
#Resizing the images to a common resolution
def resize_images(data_dir, categories, target_size):
for category in categories:
for file in os.listdir(os.path.join(data_dir, category)):
img = Image.open(os.path.join(data_dir, category, file))
img = img.resize(target_size)
img.save(os.path.join(data_dir, category, file))
target_size = (180, 180)
# Resize images in the training set
resize_images(data_dir_train, categories, target_size)
# Resize images in the validation set
data_dir_validation = pathlib.Path("kaggle_dogs_vs_cats_small/validation")
resize_images(data_dir_validation, categories, target_size)
# Resize images in the test set
data_dir_test = pathlib.Path("kaggle_dogs_vs_cats_small/test")
resize_images(data_dir_test, categories, target_size)
c. Aspect Ratio Analysis
#Analyze the aspect ratio (Width/Height) of images to understand if resizing will distort images.
df_dimensions['Aspect Ratio'] = df_dimensions['Width'] / df_dimensions['Height']
sns.histplot(df_dimensions['Aspect Ratio'], kde=True, bins=30, color='blue')
plt.title("Aspect Ratio Distribution")
plt.xlabel("Aspect Ratio (Width/Height)")
plt.ylabel("Frequency")
plt.show()
Observations :
The most common aspect ratio appears to be slightly above 1.0 (near 1.5), which suggests many images are in landscape orientation.
The Kernel Density Estimate (KDE) highlights a continuous distribution. It indicates no abrupt changes in the aspect ratio frequency.
d. Color Analysis
#Analyze the RGB color distributions in images to understand color differences between classes.
def calculate_color_distribution(img_path):
img = cv2.imread(img_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
r, g, b = img_rgb[:,:,0], img_rgb[:,:,1], img_rgb[:,:,2]
return np.mean(r), np.mean(g), np.mean(b)
colors = {'Red': [], 'Green': [], 'Blue': [], 'Label': []}
for category in categories:
for file in os.listdir(os.path.join(data_dir_train, category)):
img_path = os.path.join(data_dir_train, category, file)
r, g, b = calculate_color_distribution(img_path)
colors['Red'].append(r)
colors['Green'].append(g)
colors['Blue'].append(b)
colors['Label'].append(category)
df_colors = pd.DataFrame(colors)
#print 20 rows of label dog and cat
print(df_colors[df_colors['Label'] == 'dog'].head(10))
print(df_colors[df_colors['Label'] == 'cat'].head(10))
# Plot color distributions
plt.figure(figsize=(10, 6))
sns.boxplot(x='Label', y='Red', data=df_colors, color='red', width=0.6)
plt.title("Red Channel Distribution")
plt.show()
sns.boxplot(x='Label', y='Green', data=df_colors, color='green', width=0.6)
plt.title("Green Channel Distribution")
plt.show()
sns.boxplot(x='Label', y='Blue', data=df_colors, color='blue', width=0.6)
plt.title("Blue Channel Distribution")
plt.show()
Red Green Blue Label
1000 129.639691 101.568025 119.081235 dog
1001 150.080247 138.958056 111.539259 dog
1002 165.145895 158.065525 149.267160 dog
1003 103.429938 78.858025 64.829877 dog
1004 130.327654 128.777562 127.366512 dog
1005 102.565586 103.009815 87.081605 dog
1006 136.002438 108.627500 91.863210 dog
1007 71.054259 66.878519 60.798395 dog
1008 177.645617 162.878302 149.885432 dog
1009 106.870185 104.608302 97.088333 dog
Red Green Blue Label
0 167.578642 129.802469 62.019290 cat
1 78.808889 68.296636 67.790525 cat
2 117.531759 111.890185 96.812253 cat
3 188.009383 158.842809 147.873364 cat
4 106.317099 177.333395 172.347160 cat
5 128.515278 115.941821 111.816883 cat
6 128.108642 126.455031 119.447315 cat
7 116.484907 103.630710 98.758488 cat
8 167.295802 146.718025 112.554630 cat
9 149.162531 150.892160 147.555370 cat
Observations:
All color channels have approximately similar distributions for both classes.
e. Duplicate Image Detection
#Check if there are duplicate images in the dataset by comparing hash values of images.
def calculate_image_hash(img_path):
with open(img_path, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
hashes = {}
duplicates = []
for category in categories:
for file in os.listdir(os.path.join(data_dir_train, category)):
img_path = os.path.join(data_dir_train, category, file)
img_hash = calculate_image_hash(img_path)
if img_hash in hashes:
duplicates.append((hashes[img_hash], img_path))
else:
hashes[img_hash] = img_path
print(f"Found {len(duplicates)} duplicate images.")
Found 0 duplicate images.
f. Edge Detection Analysis
#Visualize edges in images to check if one class has more texture or details than the other.
def edge_detection(img_path):
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
edges = cv2.Canny(img, 100, 200)
return edges
# Display edge-detected images for each category
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
for ax, category in zip(axes, categories):
img_path = os.path.join(data_dir_train, category, choice(os.listdir(os.path.join(data_dir_train, category))))
edges = edge_detection(img_path)
ax.imshow(edges, cmap='gray')
ax.set_title(f"Edges: {category.capitalize()}")
ax.axis('off')
plt.tight_layout()
plt.show()
g. Image Orientation Analysis
#Count landscape vs. portrait vs. square images.
orientations = {'Landscape': 0, 'Portrait': 0, 'Square': 0}
for width, height in zip(df_dimensions['Width'], df_dimensions['Height']):
if width > height:
orientations['Landscape'] += 1
elif width < height:
orientations['Portrait'] += 1
else:
orientations['Square'] += 1
sns.barplot(x=list(orientations.keys()), y=list(orientations.values()), palette='pastel', legend=False, hue=list(orientations.keys()))
plt.title("Image Orientation Distribution")
plt.ylabel("Count")
plt.show()
Observations:
Most of the images are Landscape oriented, with a few images being Portrait oriented and no Square images.
h. Dataset Diversity (Unique Backgrounds)
#Perform clustering based on average color or texture to determine dataset diversity.
from sklearn.cluster import KMeans
# Use average color as a proxy for clustering
avg_colors = df_colors[['Red', 'Green', 'Blue']].values
kmeans = KMeans(n_clusters=3, random_state=42).fit(avg_colors)
df_colors['Cluster'] = kmeans.labels_
sns.scatterplot(data=df_colors, x='Red', y='Green', hue='Cluster', palette='deep')
plt.title("Clustering Images by Average Colors")
plt.show()
i. Heatmaps for Feature Intensity
#Create heatmaps for intensity differences between images.
def create_heatmap(img_path):
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
return cv2.resize(img, (50, 50)) # Resize for consistency
heatmaps = []
for category in categories:
img_path = os.path.join(data_dir_train, category, choice(os.listdir(os.path.join(data_dir_train, category))))
heatmaps.append(create_heatmap(img_path))
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
for ax, heatmap, category in zip(axes, heatmaps, categories):
sns.heatmap(heatmap, ax=ax, cmap='hot')
ax.set_title(category.capitalize())
plt.tight_layout()
plt.show()
Train two networks (use callbacks to save the best model version):
a) Define a Neural Network of your choice (3 points)
#Extracting the dataset using the image_dataset_from_directory utility.
train_dataset = image_dataset_from_directory(
new_base_dir / "train",
image_size=(180, 180),
batch_size=32)
validation_dataset = image_dataset_from_directory(
new_base_dir / "validation",
image_size=(180, 180),
batch_size=32)
test_dataset = image_dataset_from_directory(
new_base_dir / "test",
image_size=(180, 180),
batch_size=32)
Found 2000 files belonging to 2 classes. Found 1000 files belonging to 2 classes. Found 2000 files belonging to 2 classes.
#Defining the CNN model
inputs = keras.Input(shape=(180, 180, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()
Model: "functional"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 180, 180, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ rescaling (Rescaling) │ (None, 180, 180, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d (Conv2D) │ (None, 178, 178, 32) │ 896 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 89, 89, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 87, 87, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 43, 43, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 41, 41, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_2 (MaxPooling2D) │ (None, 20, 20, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_3 (Conv2D) │ (None, 18, 18, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_3 (MaxPooling2D) │ (None, 9, 9, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_4 (Conv2D) │ (None, 7, 7, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten (Flatten) │ (None, 12544) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 1) │ 12,545 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 991,041 (3.78 MB)
Trainable params: 991,041 (3.78 MB)
Non-trainable params: 0 (0.00 B)
#Compiling the model
model.compile(loss="binary_crossentropy",
optimizer="rmsprop",
metrics=["accuracy"])
#Defining the callbacks to save the best model
callbacks = [
keras.callbacks.ModelCheckpoint(
filepath="./models/convnet_from_scratch.keras",
save_best_only=True,
monitor="val_loss")
]
#Training the model on training and validation datasets
history = model.fit(
train_dataset,
epochs=30,
validation_data=validation_dataset,
callbacks=callbacks)
Epoch 1/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 43s 676ms/step - accuracy: 0.5015 - loss: 0.7158 - val_accuracy: 0.5810 - val_loss: 0.6915 Epoch 2/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 49s 774ms/step - accuracy: 0.5364 - loss: 0.6935 - val_accuracy: 0.5000 - val_loss: 0.6996 Epoch 3/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 45s 707ms/step - accuracy: 0.5359 - loss: 0.6944 - val_accuracy: 0.6330 - val_loss: 0.6640 Epoch 4/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 42s 668ms/step - accuracy: 0.5904 - loss: 0.6678 - val_accuracy: 0.6490 - val_loss: 0.6390 Epoch 5/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 42s 669ms/step - accuracy: 0.6602 - loss: 0.6353 - val_accuracy: 0.6900 - val_loss: 0.5945 Epoch 6/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 37s 578ms/step - accuracy: 0.6781 - loss: 0.6113 - val_accuracy: 0.6680 - val_loss: 0.6067 Epoch 7/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 48s 758ms/step - accuracy: 0.7141 - loss: 0.5484 - val_accuracy: 0.6680 - val_loss: 0.6855 Epoch 8/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 42s 658ms/step - accuracy: 0.7372 - loss: 0.5290 - val_accuracy: 0.6760 - val_loss: 0.5860 Epoch 9/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 65s 1s/step - accuracy: 0.7650 - loss: 0.4701 - val_accuracy: 0.7280 - val_loss: 0.5885 Epoch 10/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 57s 911ms/step - accuracy: 0.7982 - loss: 0.4507 - val_accuracy: 0.7180 - val_loss: 0.5965 Epoch 11/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 46s 718ms/step - accuracy: 0.8210 - loss: 0.3882 - val_accuracy: 0.7140 - val_loss: 0.5695 Epoch 12/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 48s 769ms/step - accuracy: 0.8593 - loss: 0.3214 - val_accuracy: 0.7220 - val_loss: 0.6364 Epoch 13/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 38s 612ms/step - accuracy: 0.8887 - loss: 0.2641 - val_accuracy: 0.7100 - val_loss: 0.7606 Epoch 14/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 48s 765ms/step - accuracy: 0.9084 - loss: 0.2264 - val_accuracy: 0.7150 - val_loss: 1.0422 Epoch 15/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 36s 568ms/step - accuracy: 0.9225 - loss: 0.1999 - val_accuracy: 0.7230 - val_loss: 1.0192 Epoch 16/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 39s 613ms/step - accuracy: 0.9415 - loss: 0.1485 - val_accuracy: 0.7050 - val_loss: 1.1151 Epoch 17/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 37s 591ms/step - accuracy: 0.9570 - loss: 0.1149 - val_accuracy: 0.7160 - val_loss: 1.4403 Epoch 18/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 48s 759ms/step - accuracy: 0.9673 - loss: 0.0926 - val_accuracy: 0.7270 - val_loss: 1.4034 Epoch 19/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 39s 605ms/step - accuracy: 0.9725 - loss: 0.0965 - val_accuracy: 0.7290 - val_loss: 1.2189 Epoch 20/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 36s 564ms/step - accuracy: 0.9738 - loss: 0.0746 - val_accuracy: 0.7130 - val_loss: 1.3388 Epoch 21/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 34s 548ms/step - accuracy: 0.9792 - loss: 0.0586 - val_accuracy: 0.7440 - val_loss: 1.2672 Epoch 22/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 37s 590ms/step - accuracy: 0.9859 - loss: 0.0417 - val_accuracy: 0.7140 - val_loss: 1.5079 Epoch 23/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 35s 549ms/step - accuracy: 0.9744 - loss: 0.0668 - val_accuracy: 0.7440 - val_loss: 1.6948 Epoch 24/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 35s 556ms/step - accuracy: 0.9808 - loss: 0.0425 - val_accuracy: 0.6990 - val_loss: 1.7915 Epoch 25/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 37s 592ms/step - accuracy: 0.9776 - loss: 0.0628 - val_accuracy: 0.7140 - val_loss: 1.6886 Epoch 26/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 38s 600ms/step - accuracy: 0.9839 - loss: 0.0483 - val_accuracy: 0.7420 - val_loss: 1.8417 Epoch 27/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 39s 626ms/step - accuracy: 0.9879 - loss: 0.0426 - val_accuracy: 0.7020 - val_loss: 2.0576 Epoch 28/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 35s 559ms/step - accuracy: 0.9824 - loss: 0.0583 - val_accuracy: 0.7170 - val_loss: 1.6475 Epoch 29/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 35s 551ms/step - accuracy: 0.9981 - loss: 0.0080 - val_accuracy: 0.6950 - val_loss: 2.1540 Epoch 30/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 35s 557ms/step - accuracy: 0.9903 - loss: 0.0356 - val_accuracy: 0.7160 - val_loss: 2.4697
#Plotting the training and validation accuracy and loss
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(accuracy) + 1)
plt.plot(epochs, accuracy, "bo", label="Training accuracy")
plt.plot(epochs, val_accuracy, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()
Around epoch 10-12, the training accuracy increases consistently toward 1.0, while the validation accuracy starts to fluctuate and does not follow the same trend leading to overfitting
#Evaluating the model on the test dataset
test_model = keras.models.load_model("./models/convnet_from_scratch.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.2f}")
63/63 ━━━━━━━━━━━━━━━━━━━━ 11s 162ms/step - accuracy: 0.7447 - loss: 0.5715 Test accuracy: 0.74
b) Fine-Tune VGG16 (pre-trained on imagenet).
Make sure to use validation to test for over-fitting. Plot the appropriate graph (3 points)
#Using a pretrained model for feature extraction
conv_base = keras.applications.vgg16.VGG16(
weights="imagenet",
include_top=False,
input_shape=(180, 180, 3))
#Freezing the convolutional base
conv_base.trainable = False
#Model summary
conv_base.summary()
Model: "vgg16"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer_1 (InputLayer) │ (None, 180, 180, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv1 (Conv2D) │ (None, 180, 180, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 180, 180, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 90, 90, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 90, 90, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 90, 90, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 45, 45, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 45, 45, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv2 (Conv2D) │ (None, 45, 45, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv3 (Conv2D) │ (None, 45, 45, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_pool (MaxPooling2D) │ (None, 22, 22, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv1 (Conv2D) │ (None, 22, 22, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv2 (Conv2D) │ (None, 22, 22, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv3 (Conv2D) │ (None, 22, 22, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_pool (MaxPooling2D) │ (None, 11, 11, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv1 (Conv2D) │ (None, 11, 11, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv2 (Conv2D) │ (None, 11, 11, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv3 (Conv2D) │ (None, 11, 11, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_pool (MaxPooling2D) │ (None, 5, 5, 512) │ 0 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 14,714,688 (56.13 MB)
Trainable params: 0 (0.00 B)
Non-trainable params: 14,714,688 (56.13 MB)
inputs = keras.Input(shape=(180, 180, 3))
x = keras.applications.vgg16.preprocess_input(inputs)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.summary()
Model: "functional_1"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ ┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ input_layer_2 │ (None, 180, 180, │ 0 │ - │ │ (InputLayer) │ 3) │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ get_item (GetItem) │ (None, 180, 180) │ 0 │ input_layer_2[0]… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ get_item_1 │ (None, 180, 180) │ 0 │ input_layer_2[0]… │ │ (GetItem) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ get_item_2 │ (None, 180, 180) │ 0 │ input_layer_2[0]… │ │ (GetItem) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ stack (Stack) │ (None, 180, 180, │ 0 │ get_item[0][0], │ │ │ 3) │ │ get_item_1[0][0], │ │ │ │ │ get_item_2[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add (Add) │ (None, 180, 180, │ 0 │ stack[0][0] │ │ │ 3) │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ vgg16 (Functional) │ (None, 5, 5, 512) │ 14,714,688 │ add[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ flatten_1 (Flatten) │ (None, 12800) │ 0 │ vgg16[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_1 (Dense) │ (None, 256) │ 3,277,056 │ flatten_1[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dropout (Dropout) │ (None, 256) │ 0 │ dense_1[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_2 (Dense) │ (None, 1) │ 257 │ dropout[0][0] │ └─────────────────────┴───────────────────┴────────────┴───────────────────┘
Total params: 17,992,001 (68.63 MB)
Trainable params: 3,277,313 (12.50 MB)
Non-trainable params: 14,714,688 (56.13 MB)
#Compiling the model
model.compile(loss="binary_crossentropy",
optimizer="rmsprop",
metrics=["accuracy"])
#Defining the callbacks to save the best model
callbacks = [
keras.callbacks.ModelCheckpoint(
filepath="./models/feature_extraction.keras",
save_best_only=True,
monitor="val_loss")
]
#Training the model on training and validation datasets
history = model.fit(
train_dataset,
epochs=30,
validation_data=validation_dataset,
callbacks=callbacks)
Epoch 1/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 261s 4s/step - accuracy: 0.8642 - loss: 21.1760 - val_accuracy: 0.9530 - val_loss: 7.1667 Epoch 2/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 252s 4s/step - accuracy: 0.9824 - loss: 2.5737 - val_accuracy: 0.9660 - val_loss: 3.2797 Epoch 3/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 251s 4s/step - accuracy: 0.9879 - loss: 1.4765 - val_accuracy: 0.9680 - val_loss: 4.3463 Epoch 4/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 251s 4s/step - accuracy: 0.9884 - loss: 1.5524 - val_accuracy: 0.9540 - val_loss: 9.2353 Epoch 5/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 262s 4s/step - accuracy: 0.9913 - loss: 0.8668 - val_accuracy: 0.9730 - val_loss: 3.2681 Epoch 6/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 275s 4s/step - accuracy: 0.9877 - loss: 1.2947 - val_accuracy: 0.9770 - val_loss: 4.5940 Epoch 7/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 292s 5s/step - accuracy: 0.9959 - loss: 0.3804 - val_accuracy: 0.9680 - val_loss: 5.3657 Epoch 8/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 347s 6s/step - accuracy: 0.9949 - loss: 0.7396 - val_accuracy: 0.9550 - val_loss: 9.9006 Epoch 9/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 301s 5s/step - accuracy: 0.9916 - loss: 0.5748 - val_accuracy: 0.9780 - val_loss: 4.5240 Epoch 10/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 292s 5s/step - accuracy: 0.9977 - loss: 0.2598 - val_accuracy: 0.9730 - val_loss: 5.8527 Epoch 11/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 610s 10s/step - accuracy: 0.9980 - loss: 0.1285 - val_accuracy: 0.9780 - val_loss: 4.0968 Epoch 12/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 333s 5s/step - accuracy: 0.9945 - loss: 0.5673 - val_accuracy: 0.9760 - val_loss: 4.6668 Epoch 13/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 436s 7s/step - accuracy: 0.9997 - loss: 0.0396 - val_accuracy: 0.9760 - val_loss: 4.6181 Epoch 14/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 357s 6s/step - accuracy: 0.9976 - loss: 0.0645 - val_accuracy: 0.9760 - val_loss: 4.7599 Epoch 15/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 317s 5s/step - accuracy: 0.9981 - loss: 0.3607 - val_accuracy: 0.9770 - val_loss: 4.6409 Epoch 16/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 310s 5s/step - accuracy: 0.9974 - loss: 0.1133 - val_accuracy: 0.9720 - val_loss: 7.3503 Epoch 17/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 331s 5s/step - accuracy: 0.9987 - loss: 0.1584 - val_accuracy: 0.9750 - val_loss: 4.5434 Epoch 18/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 414s 7s/step - accuracy: 1.0000 - loss: 1.4760e-19 - val_accuracy: 0.9750 - val_loss: 4.5434 Epoch 19/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 435s 7s/step - accuracy: 0.9991 - loss: 0.0526 - val_accuracy: 0.9720 - val_loss: 5.4401 Epoch 20/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 459s 7s/step - accuracy: 0.9995 - loss: 0.0111 - val_accuracy: 0.9770 - val_loss: 4.1075 Epoch 21/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 465s 7s/step - accuracy: 0.9963 - loss: 0.3711 - val_accuracy: 0.9800 - val_loss: 4.3437 Epoch 22/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 472s 8s/step - accuracy: 0.9973 - loss: 0.5296 - val_accuracy: 0.9820 - val_loss: 4.4067 Epoch 23/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 517s 8s/step - accuracy: 0.9984 - loss: 0.1685 - val_accuracy: 0.9750 - val_loss: 5.2391 Epoch 24/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 629s 10s/step - accuracy: 0.9970 - loss: 0.1161 - val_accuracy: 0.9780 - val_loss: 4.8676 Epoch 25/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 3910s 63s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.9780 - val_loss: 4.8676 Epoch 26/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 170s 3s/step - accuracy: 1.0000 - loss: 6.5571e-34 - val_accuracy: 0.9780 - val_loss: 4.8676 Epoch 27/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 910s 15s/step - accuracy: 1.0000 - loss: 6.7279e-30 - val_accuracy: 0.9780 - val_loss: 4.8676 Epoch 28/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 760s 12s/step - accuracy: 1.0000 - loss: 8.4425e-37 - val_accuracy: 0.9780 - val_loss: 4.8676 Epoch 29/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 811s 13s/step - accuracy: 1.0000 - loss: 1.4601e-27 - val_accuracy: 0.9780 - val_loss: 4.8676 Epoch 30/30 63/63 ━━━━━━━━━━━━━━━━━━━━ 504s 8s/step - accuracy: 0.9993 - loss: 0.0233 - val_accuracy: 0.9800 - val_loss: 4.7385
#Evaluate the model on the test dataset
test_model = keras.models.load_model("./models/feature_extraction.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")
63/63 ━━━━━━━━━━━━━━━━━━━━ 312s 5s/step - accuracy: 0.9715 - loss: 4.5783 Test accuracy: 0.972
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training accuracy")
plt.plot(epochs, val_acc, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()
Fine-tuning the model pretrained on ImageNet
conv_base.summary()
Model: "vgg16"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer_1 (InputLayer) │ (None, 180, 180, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv1 (Conv2D) │ (None, 180, 180, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_conv2 (Conv2D) │ (None, 180, 180, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block1_pool (MaxPooling2D) │ (None, 90, 90, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv1 (Conv2D) │ (None, 90, 90, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_conv2 (Conv2D) │ (None, 90, 90, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block2_pool (MaxPooling2D) │ (None, 45, 45, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv1 (Conv2D) │ (None, 45, 45, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv2 (Conv2D) │ (None, 45, 45, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_conv3 (Conv2D) │ (None, 45, 45, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block3_pool (MaxPooling2D) │ (None, 22, 22, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv1 (Conv2D) │ (None, 22, 22, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv2 (Conv2D) │ (None, 22, 22, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_conv3 (Conv2D) │ (None, 22, 22, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block4_pool (MaxPooling2D) │ (None, 11, 11, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv1 (Conv2D) │ (None, 11, 11, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv2 (Conv2D) │ (None, 11, 11, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_conv3 (Conv2D) │ (None, 11, 11, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ block5_pool (MaxPooling2D) │ (None, 5, 5, 512) │ 0 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 14,714,688 (56.13 MB)
Trainable params: 0 (0.00 B)
Non-trainable params: 14,714,688 (56.13 MB)
#Fine-tuning the model pretrained on ImageNet
conv_base.trainable = True
#Freezing all layers except the last four
for layer in conv_base.layers[:-4]:
layer.trainable = False
#Summary of the model
model.summary()
Model: "functional_1"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ ┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ input_layer_2 │ (None, 180, 180, │ 0 │ - │ │ (InputLayer) │ 3) │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ get_item (GetItem) │ (None, 180, 180) │ 0 │ input_layer_2[0]… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ get_item_1 │ (None, 180, 180) │ 0 │ input_layer_2[0]… │ │ (GetItem) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ get_item_2 │ (None, 180, 180) │ 0 │ input_layer_2[0]… │ │ (GetItem) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ stack (Stack) │ (None, 180, 180, │ 0 │ get_item[0][0], │ │ │ 3) │ │ get_item_1[0][0], │ │ │ │ │ get_item_2[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add (Add) │ (None, 180, 180, │ 0 │ stack[0][0] │ │ │ 3) │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ vgg16 (Functional) │ (None, 5, 5, 512) │ 14,714,688 │ add[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ flatten_1 (Flatten) │ (None, 12800) │ 0 │ vgg16[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_1 (Dense) │ (None, 256) │ 3,277,056 │ flatten_1[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dropout (Dropout) │ (None, 256) │ 0 │ dense_1[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_2 (Dense) │ (None, 1) │ 257 │ dropout[0][0] │ └─────────────────────┴───────────────────┴────────────┴───────────────────┘
Total params: 21,269,316 (81.14 MB)
Trainable params: 10,356,737 (39.51 MB)
Non-trainable params: 7,635,264 (29.13 MB)
Optimizer params: 3,277,315 (12.50 MB)
#Compiling the model
model.compile(loss="binary_crossentropy",
optimizer=keras.optimizers.RMSprop(learning_rate=1e-5),
metrics=["accuracy"])
#Defining the callbacks to save the best model
callbacks = [
keras.callbacks.ModelCheckpoint(
filepath="./models/fine_tuning.keras",
save_best_only=True,
monitor="val_loss")
]
#Training the model on training and validation datasets
history = model.fit(
train_dataset,
epochs=20,
validation_data=validation_dataset,
callbacks=callbacks)
Epoch 1/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 499s 8s/step - accuracy: 1.0000 - loss: 6.4943e-23 - val_accuracy: 0.9800 - val_loss: 4.7385 Epoch 2/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 277s 4s/step - accuracy: 0.9984 - loss: 0.0264 - val_accuracy: 0.9810 - val_loss: 5.2788 Epoch 3/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 289s 5s/step - accuracy: 0.9998 - loss: 0.0224 - val_accuracy: 0.9760 - val_loss: 5.4395 Epoch 4/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 284s 5s/step - accuracy: 0.9998 - loss: 0.0079 - val_accuracy: 0.9790 - val_loss: 4.6269 Epoch 5/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 461s 7s/step - accuracy: 0.9988 - loss: 0.1087 - val_accuracy: 0.9820 - val_loss: 4.6418 Epoch 6/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 301s 5s/step - accuracy: 0.9995 - loss: 0.0336 - val_accuracy: 0.9760 - val_loss: 5.3993 Epoch 7/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 342s 5s/step - accuracy: 1.0000 - loss: 1.6372e-26 - val_accuracy: 0.9760 - val_loss: 5.3993 Epoch 8/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 329s 5s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.9760 - val_loss: 5.3993 Epoch 9/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 1242s 20s/step - accuracy: 1.0000 - loss: 1.2752e-10 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 10/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 266s 4s/step - accuracy: 1.0000 - loss: 1.3474e-17 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 11/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 841s 13s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 12/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 223s 4s/step - accuracy: 1.0000 - loss: 2.3338e-29 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 13/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 294s 5s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 14/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 308s 5s/step - accuracy: 1.0000 - loss: 3.5104e-36 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 15/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 312s 5s/step - accuracy: 1.0000 - loss: 1.2627e-21 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 16/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 9099s 147s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.9760 - val_loss: 5.3980 Epoch 17/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 193s 3s/step - accuracy: 0.9993 - loss: 0.0185 - val_accuracy: 0.9790 - val_loss: 4.8890 Epoch 18/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 243s 4s/step - accuracy: 0.9993 - loss: 0.0082 - val_accuracy: 0.9810 - val_loss: 4.9594 Epoch 19/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 288s 5s/step - accuracy: 1.0000 - loss: 7.5907e-22 - val_accuracy: 0.9810 - val_loss: 4.9594 Epoch 20/20 63/63 ━━━━━━━━━━━━━━━━━━━━ 605s 10s/step - accuracy: 1.0000 - loss: 2.6198e-37 - val_accuracy: 0.9810 - val_loss: 4.9594
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training accuracy")
plt.plot(epochs, val_acc, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()
model = keras.models.load_model("./models/fine_tuning.keras")
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.2f}")
63/63 ━━━━━━━━━━━━━━━━━━━━ 136s 2s/step - accuracy: 0.9718 - loss: 6.4245 Test accuracy: 0.97
Explore the relative performance of the models (make sure to load the best version of each model) (6 points):
1.accuracy
2.confusion metric
3.precision, recall, F1-score,
4.precision-recall curve.
5.Explore specific examples in which the model failed to predict correctly.
#Extracting the best models for evaluation
cnn_model = keras.models.load_model("./models/convnet_from_scratch.keras")
vgg_model = keras.models.load_model("./models/feature_extraction.keras")
fine_tuned_model = keras.models.load_model("./models/fine_tuning.keras")
- accuracy
#To calculate the accuracy of the models
#Model 1: Convolutional Neural Network from Scratch
test_loss_cnn, test_acc_cnn = cnn_model.evaluate(test_dataset)
print(f"Test accuracy of Convolutional Neural Network from Scratch: {test_acc_cnn:.2f}")
#Model 2: Feature Extraction with VGG16
test_loss_vgg, test_acc_vgg = vgg_model.evaluate(test_dataset)
print(f"Test accuracy of Feature Extraction with VGG16: {test_acc_vgg:.2f}")
#Model 3: Fine-tuning VGG16
test_loss_ft, test_acc_ft = fine_tuned_model.evaluate(test_dataset)
print(f"Test accuracy of Fine-tuning VGG16: {test_acc_ft:.2f}")
63/63 ━━━━━━━━━━━━━━━━━━━━ 9s 136ms/step - accuracy: 0.7507 - loss: 0.5664 Test accuracy of Convolutional Neural Network from Scratch: 0.74 63/63 ━━━━━━━━━━━━━━━━━━━━ 183s 3s/step - accuracy: 0.9734 - loss: 4.3406 Test accuracy of Feature Extraction with VGG16: 0.97 63/63 ━━━━━━━━━━━━━━━━━━━━ 179s 3s/step - accuracy: 0.9718 - loss: 6.3995 Test accuracy of Fine-tuning VGG16: 0.97
2.confusion metric
#To calculate the confusion matrix of the models
#Model 1: Convolutional Neural Network from Scratch
y_pred_cnn = cnn_model.predict(test_dataset)
y_pred_cnn = np.where(y_pred_cnn > 0.5, 1, 0)
y_true_cnn = np.concatenate([y for x, y in test_dataset], axis=0)
cm_cnn = confusion_matrix(y_true_cnn, y_pred_cnn)
print("Confusion matrix of Convolutional Neural Network from Scratch:")
print(cm_cnn)
#Model 2: Feature Extraction with VGG16
y_pred_vgg = vgg_model.predict(test_dataset)
y_pred_vgg = np.where(y_pred_vgg > 0.5, 1, 0)
y_true_vgg = np.concatenate([y for x, y in test_dataset], axis=0)
cm_vgg = confusion_matrix(y_true_vgg, y_pred_vgg)
print("Confusion matrix of Feature Extraction with VGG16:")
print(cm_vgg)
#Model 3: Fine-tuning VGG16
y_pred_ft = fine_tuned_model.predict(test_dataset)
y_pred_ft = np.where(y_pred_ft > 0.5, 1, 0)
y_true_ft = np.concatenate([y for x, y in test_dataset], axis=0)
cm_ft = confusion_matrix(y_true_ft, y_pred_ft)
print("Confusion matrix of Fine-tuning VGG16:")
print(cm_ft)
63/63 ━━━━━━━━━━━━━━━━━━━━ 8s 130ms/step Confusion matrix of Convolutional Neural Network from Scratch: [[518 482] [483 517]] 63/63 ━━━━━━━━━━━━━━━━━━━━ 204s 3s/step Confusion matrix of Feature Extraction with VGG16: [[525 475] [513 487]] 63/63 ━━━━━━━━━━━━━━━━━━━━ 242s 4s/step Confusion matrix of Fine-tuning VGG16: [[507 493] [509 491]]
3.precision, recall, F1-score
#To calculate the precision, recall, and F1-score of the models
#Model 1: Convolutional Neural Network from Scratch
y_pred_cnn = cnn_model.predict(test_dataset)
y_pred_cnn = np.where(y_pred_cnn > 0.5, 1, 0)
y_true_cnn = np.concatenate([y for x, y in test_dataset], axis=0)
print("Classification report of Convolutional Neural Network from Scratch:")
print(classification_report(y_true_cnn, y_pred_cnn))
#Model 2: Feature Extraction with VGG16
y_pred_vgg = vgg_model.predict(test_dataset)
y_pred_vgg = np.where(y_pred_vgg > 0.5, 1, 0)
y_true_vgg = np.concatenate([y for x, y in test_dataset], axis=0)
print("Classification report of Feature Extraction with VGG16:")
print(classification_report(y_true_vgg, y_pred_vgg))
#Model 3: Fine-tuning VGG16
y_pred_ft = fine_tuned_model.predict(test_dataset)
y_pred_ft = np.where(y_pred_ft > 0.5, 1, 0)
y_true_ft = np.concatenate([y for x, y in test_dataset], axis=0)
print("Classification report of Fine-tuning VGG16:")
print(classification_report(y_true_ft, y_pred_ft))
63/63 ━━━━━━━━━━━━━━━━━━━━ 8s 133ms/step Classification report of Convolutional Neural Network from Scratch: precision recall f1-score support 0 0.49 0.49 0.49 1000 1 0.49 0.49 0.49 1000 accuracy 0.49 2000 macro avg 0.49 0.49 0.49 2000 weighted avg 0.49 0.49 0.49 2000 63/63 ━━━━━━━━━━━━━━━━━━━━ 147s 2s/step Classification report of Feature Extraction with VGG16: precision recall f1-score support 0 0.51 0.53 0.52 1000 1 0.52 0.50 0.51 1000 accuracy 0.52 2000 macro avg 0.52 0.52 0.51 2000 weighted avg 0.52 0.52 0.51 2000 63/63 ━━━━━━━━━━━━━━━━━━━━ 944s 15s/step Classification report of Fine-tuning VGG16: precision recall f1-score support 0 0.52 0.52 0.52 1000 1 0.52 0.51 0.51 1000 accuracy 0.52 2000 macro avg 0.52 0.52 0.52 2000 weighted avg 0.52 0.52 0.52 2000
4.precision-recall curve.
# Function to plot Precision-Recall curve
def plot_pr_curve(y_true, y_pred, model_name, ax):
precision, recall, _ = precision_recall_curve(y_true, y_pred)
average_precision = average_precision_score(y_true, y_pred)
ax.plot(recall, precision, label=f'{model_name} (AP={average_precision:.2f})')
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_title('Precision-Recall Curve')
ax.legend()
# Create subplots
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
# Model 1: Convolutional Neural Network from Scratch
y_pred_cnn = cnn_model.predict(test_dataset)
y_true_cnn = np.concatenate([y for _, y in test_dataset], axis=0)
plot_pr_curve(y_true_cnn, y_pred_cnn, "CNN from Scratch", axes[0])
# Model 2: Feature Extraction with VGG16
y_pred_vgg = vgg_model.predict(test_dataset)
y_true_vgg = np.concatenate([y for _, y in test_dataset], axis=0)
plot_pr_curve(y_true_vgg, y_pred_vgg, "Feature Extraction with VGG16", axes[1])
# Model 3: Fine-tuning VGG16
y_pred_ft = fine_tuned_model.predict(test_dataset)
y_true_ft = np.concatenate([y for _, y in test_dataset], axis=0)
plot_pr_curve(y_true_ft, y_pred_ft, "Fine-tuning VGG16", axes[2])
# Adjust layout and display the plot
plt.tight_layout()
plt.show()
63/63 ━━━━━━━━━━━━━━━━━━━━ 8s 127ms/step 63/63 ━━━━━━━━━━━━━━━━━━━━ 128s 2s/step 63/63 ━━━━━━━━━━━━━━━━━━━━ 176s 3s/step
5.Explore specific examples in which the model failed to predict correctly.
#To explore specific examples in which the model failed to predict correctly
#Model 1: Convolutional Neural Network from Scratch
y_pred_cnn = cnn_model.predict(test_dataset)
y_pred_cnn = np.where(y_pred_cnn > 0.5, 1, 0)
y_true_cnn = np.concatenate([y for x, y in test_dataset], axis=0)
incorrect_indices_cnn = np.where(y_pred_cnn != y_true_cnn)[0]
incorrect_images_cnn = [x for i, x in enumerate(test_dataset) if i in incorrect_indices_cnn]
# Display the first 5 incorrect images (or fewer if there are less than 5 incorrect images)
num_images_to_display = min(5, len(incorrect_images_cnn))
fig, axes = plt.subplots(1, num_images_to_display, figsize=(20, 10))
for i in range(num_images_to_display):
image, label = incorrect_images_cnn[i]
axes[i].imshow(image[0] / 255)
axes[i].set_title(f"True: {label.numpy()}, Predicted: {y_pred_cnn[incorrect_indices_cnn[i]]}")
axes[i].axis("off")
plt.tight_layout()
plt.show()
63/63 ━━━━━━━━━━━━━━━━━━━━ 9s 142ms/step
#Model 2: Feature Extraction with VGG16
y_pred_vgg = vgg_model.predict(test_dataset)
y_pred_vgg = np.where(y_pred_vgg > 0.5, 1, 0)
y_true_vgg = np.concatenate([y for x, y in test_dataset], axis=0)
incorrect_indices_vgg = np.where(y_pred_vgg != y_true_vgg)[0]
incorrect_images_vgg = [x for i, x in enumerate(test_dataset) if i in incorrect_indices_vgg]
# Display the first 5 incorrect images (or fewer if there are less than 5 incorrect images)
num_images_to_display = min(5, len(incorrect_images_cnn))
fig, axes = plt.subplots(1, num_images_to_display, figsize=(20, 10))
for i in range(num_images_to_display):
image, label = incorrect_images_cnn[i]
axes[i].imshow(image[0] / 255)
axes[i].set_title(f"True: {label.numpy()}, Predicted: {y_pred_vgg[incorrect_indices_vgg[i]]}")
axes[i].axis("off")
plt.tight_layout()
plt.show()
63/63 ━━━━━━━━━━━━━━━━━━━━ 196s 3s/step
#Model 3: Fine-tuning VGG16
y_pred_ft = fine_tuned_model.predict(test_dataset)
y_pred_ft = np.where(y_pred_ft > 0.5, 1, 0)
y_true_ft = np.concatenate([y for x, y in test_dataset], axis=0)
incorrect_indices_ft = np.where(y_pred_ft != y_true_ft)[0]
incorrect_images_ft = [x for i, x in enumerate(test_dataset) if i in incorrect_indices_ft]
# Display the first 5 incorrect images (or fewer if there are less than 5 incorrect images)
num_images_to_display = min(5, len(incorrect_images_cnn))
fig, axes = plt.subplots(1, num_images_to_display, figsize=(20, 10))
for i in range(num_images_to_display):
image, label = incorrect_images_cnn[i]
axes[i].imshow(image[0] / 255)
axes[i].set_title(f"True: {label.numpy()}, Predicted: {y_pred_ft[incorrect_indices_ft[i]]}")
axes[i].axis("off")
plt.tight_layout()
plt.show()
63/63 ━━━━━━━━━━━━━━━━━━━━ 165s 3s/step
- Add your conclusions. (3 point)
The Convolutional Neural Network from Scratch model achieved an accuracy of 0.74 on the test dataset.
The Feature Extraction with VGG16 model achieved an accuracy of 0.97 on the test dataset.
The Fine-tuning VGG16 model achieved an accuracy of 0.97 on the test dataset.
The Feature Extraction with VGG16 and Fine-tuning VGG16 models outperformed the Convolutional Neural Network from Scratch model, achieving higher accuracy on the test dataset.
The Precision-Recall curve shows that the Fine-tuning VGG16 model has the highest average precision, followed by the Feature Extraction with VGG16 model and the Convolutional Neural Network from Scratch model.
The models failed to predict correctly on some images that were challenging due to factors such as image quality, lighting conditions, and occlusions.
Overall, the Fine-tuning VGG16 model performed the best on the test dataset, achieving the highest accuracy and average precision among the three models. The Feature Extraction with VGG16 model also performed well, with similar accuracy and average precision to the Fine-tuning VGG16 model. The Convolutional Neural Network from Scratch model had the lowest accuracy and average precision, indicating that transfer learning with pre-trained models can significantly improve model performance on image classification tasks.